# Data URL
data_url = "https://scrnaseq-workshop.s3-us-west-2.amazonaws.com"
# Import RNA matrix
rna_mtx <- file.path(data_url, "CITEseq_cDNA.csv.gz") %>%
read_csv() %>%
column_to_rownames("X1") %>%
as.sparse()
rna_mtx[1:10, 1:10]10 x 10 sparse Matrix of class "dgCMatrix"
MIR1302-2HG . . . . . . . . . .
FAM138A . . . . . . . . . .
OR4F5 . . . . . . . . . .
AL627309.1 . . . . . . . . . .
AL627309.3 . . . . . . . . . .
AL627309.2 . . . . . . . . . .
AL627309.4 . . . . . . . . . .
AL732372.1 . . . . . . . . . .
OR4F29 . . . . . . . . . .
AC114498.1 . . . . . . . . . .
# Import ADT matrix
adt_mtx <- file.path(data_url, "CITEseq_ADT.csv.gz") %>%
read_csv() %>%
column_to_rownames("X1") %>%
as.sparse()
rownames(adt_mtx) <- str_c("adt-", rownames(adt_mtx))
adt_mtx[, 1:10]8 x 10 sparse Matrix of class "dgCMatrix"
adt-CD14 11 2 14 1 . 1 1 . . 1
adt-CD19 9 . 165 295 . 3 6 4 2 1
adt-CD3 . 19 79 2 3 6 9 3 3 4
adt-CD4 69 360 293 7 159 321 1 7 7 4
adt-CD45 . . 3 4 . 1 . . 1 .
adt-CD45RA 25 3 246 16 8 5 18 6 4 9
adt-CD45RO 6 4 108 6 . 2 1 2 1 3
adt-CD8 7 . 36 6 . . 473 21 7 197
# Import HTO matrix
hto_mtx <- file.path(data_url, "CITEseq_HTO.csv.gz") %>%
read_csv() %>%
column_to_rownames("X1") %>%
as.sparse()
hto_mtx[, 1:10]4 x 10 sparse Matrix of class "dgCMatrix"
HTO28 351 . . . 2 161 . . . .
HTO29 6 2 3 1 1 2 2 107 2 .
HTO30 . 131 177 60 . . . . 239 155
HTO44 1 . . . 122 . 172 . 1 .
# Get list of common cell barcodes
rna_bcs <- colnames(rna_mtx)
adt_bcs <- colnames(adt_mtx)
hto_bcs <- colnames(hto_mtx)
merged_bcs <- rna_bcs %>%
intersect(adt_bcs) %>%
intersect(hto_bcs)
# Create Seurat object
sobj <- rna_mtx %>%
.[, rna_bcs %in% merged_bcs] %>%
CreateSeuratObject(min.cells = 5)
# Add antibody and cell hashing data to Seurat object
sobj[["ADT"]] <- adt_mtx %>%
.[, adt_bcs %in% merged_bcs] %>%
CreateAssayObject()
sobj[["HTO"]] <- hto_mtx %>%
.[, hto_bcs %in% merged_bcs] %>%
CreateAssayObject()
sobjAn object of class Seurat
15032 features across 4292 samples within 3 assays
Active assay: RNA (15020 features)
2 other assays present: ADT, HTO
# Normalize HTO counts
sobj <- sobj %>%
NormalizeData(
assay = "HTO",
normalization.method = "CLR" # Centered log-ratio normalization
)# Demultiplex samples
# By default HTODemux will look for the "HTO" assay
sobj <- sobj %>%
HTODemux()
sobj@meta.data %>%
head() orig.ident nCount_RNA nFeature_RNA nCount_ADT
AAACCTGAGACAAAGG SeuratProject 3054 1217 388
AAACCTGAGCTACCGC SeuratProject 2411 989 337
AAACCTGCACCAGGTC SeuratProject 4182 1394 509
AAACCTGCACTCTGTC SeuratProject 3681 1456 43
AAACCTGGTATGAAAC SeuratProject 2503 1255 530
AAACCTGGTGAGTATA SeuratProject 1241 692 352
nFeature_ADT nCount_HTO nFeature_HTO HTO_maxID
AAACCTGAGACAAAGG 5 358 3 HTO28
AAACCTGAGCTACCGC 8 180 2 HTO30
AAACCTGCACCAGGTC 7 163 2 HTO28
AAACCTGCACTCTGTC 6 174 2 HTO44
AAACCTGGTATGAAAC 6 638 2 HTO28
AAACCTGGTGAGTATA 6 71 2 HTO44
HTO_secondID HTO_margin HTO_classification
AAACCTGAGACAAAGG HTO29 3.211027 HTO28
AAACCTGAGCTACCGC HTO29 3.448779 HTO30
AAACCTGCACCAGGTC HTO29 2.802755 HTO28
AAACCTGCACTCTGTC HTO29 3.148890 HTO44
AAACCTGGTATGAAAC HTO29 3.649778 HTO28
AAACCTGGTGAGTATA HTO29 2.285112 HTO44
HTO_classification.global hash.ID
AAACCTGAGACAAAGG Singlet HTO28
AAACCTGAGCTACCGC Singlet HTO30
AAACCTGCACCAGGTC Singlet HTO28
AAACCTGCACTCTGTC Singlet HTO44
AAACCTGGTATGAAAC Singlet HTO28
AAACCTGGTGAGTATA Singlet HTO44
# Create ridge plots showing HTO signal
sobj %>%
RidgePlot(
assay = "HTO",
features = rownames(hto_mtx),
ncol = 2
)# Rename cell identities with sample names
sobj <- sobj %>%
RenameIdents(
"HTO28" = "PBMC-1",
"HTO29" = "PBMC-2",
"HTO30" = "PBMC-3",
"HTO44" = "PBMC-4"
)
# Add sample names to meta.data table
sobj <- sobj %>%
AddMetaData(
metadata = Idents(sobj),
col.name = "sample"
)# Use the meta.data table to compare the number of cells with each hash.ID
# sobj@meta.data %>%# Add mitochondrial percentage to meta.data table
sobj <- sobj %>%
PercentageFeatureSet(
assay = "RNA",
pattern = "^MT-",
col.name = "percent_mito"
)
# Create violin plots for gene expression data
sobj %>%
VlnPlot(
features = c("nCount_RNA", "nFeature_RNA", "percent_mito"),
ncol = 3,
pt.size = 0.25
)sobj %>%
VlnPlot(
features = c(
"nCount_ADT", "nFeature_ADT",
"nCount_HTO", "nFeature_HTO"
),
ncol = 2,
pt.size = 0.25,
log = T
)# Filter cells based on number of genes detected, percent mito UMIs, and HTO_classification.global
# filt_sobj <- sobj %>%
# subset(
#
#
#
#
# )An object of class Seurat
15032 features across 3644 samples within 3 assays
Active assay: RNA (15020 features)
2 other assays present: ADT, HTO
# Normalize expression data
filt_sobj <- filt_sobj %>%
SCTransform(
vars.to.regress = "percent_mito",
variable.features.n = 2000,
verbose = F
)
# Normalize and scale antibody data
filt_sobj <- filt_sobj %>%
NormalizeData(
assay = "ADT",
normalization.method = "CLR" # Centered log-ratio normalization
) %>%
ScaleData(assay = "ADT")# Perform PCA
filt_sobj <- filt_sobj %>%
RunPCA(verbose = F) # By default only variable features are used for PCA
# Plot standard deviations of principal components
elbow_plot <- filt_sobj %>%
ElbowPlot(ndims = 50)
# Create scatter plot comparing PC-1 and PC-2
PCA_plot <- filt_sobj %>%
DimPlot(reduction = "pca")
plot_grid(
elbow_plot, PCA_plot,
nrow = 1,
rel_widths = c(0.4, 0.6)
)# Cluster cells
filt_sobj <- filt_sobj %>%
FindNeighbors(dims = 1:40) %>%
FindClusters(
resolution = 0.6,
verbose = F
)
# Run UMAP
# Use same PCs as clustering, by default reduction = "pca"
filt_sobj <- filt_sobj %>%
RunUMAP(dims = 1:40)
# Find marker genes for each cluster
RNA_markers <- filt_sobj %>%
FindAllMarkers(only.pos = T) %>%
filter(p_val_adj < 0.05)
# Add RNA clusters to meta.data
filt_sobj <- filt_sobj %>%
AddMetaData(
metadata = Idents(filt_sobj),
col.name = "RNA_clusters"
)
# Create UMAP
filt_sobj %>%
DimPlot(reduction = "umap")# Since there are only a few antibodies, instead of doing PCA just use a standard
# euclidean distance matrix
adt_data <- filt_sobj@assays$ADT@data
adt_dist <- dist(t(adt_data))
# Find clusters using distance matrix
filt_sobj[["adt_snn"]] <- FindNeighbors(adt_dist)$snn
filt_sobj <- filt_sobj %>%
FindClusters(
assay = "ADT",
resolution = 0.2,
graph.name = "adt_snn"
)
# Add ADT clusters to meta.data
filt_sobj <- filt_sobj %>%
AddMetaData(
metadata = Idents(filt_sobj),
col.name = "ADT_clusters"
)# Run UMAP
filt_sobj <- filt_sobj %>%
RunUMAP(
reduction.name = "adt_umap",
reduction.key = "ADTUMAP_",
graph = "adt_snn"
)
# Create UMAP
filt_sobj %>%
DimPlot(reduction = "adt_umap")# Identify differentially expressed proteins for each cluster
# filt_sobj %>%
# FindAllMarkers(
#
# ) p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
1 0.000000e+00 1.9833363 1.000 0.935 0.000000e+00 0 adt-CD8
2 0.000000e+00 2.5471134 1.000 0.904 0.000000e+00 1 adt-CD4
3 4.599577e-147 1.6981277 0.894 0.349 3.679662e-146 3 adt-CD14
4 2.093009e-103 0.8156108 0.937 0.614 1.674407e-102 3 adt-CD45
5 5.462616e-102 1.3856033 0.987 0.864 4.370093e-101 3 adt-CD45RO
6 9.938762e-80 0.3601482 0.997 0.913 7.951009e-79 3 adt-CD19
7 3.042501e-10 0.4666591 0.997 0.981 2.434001e-09 3 adt-CD45RA
8 4.685913e-82 3.4338342 1.000 0.918 3.748730e-81 4 adt-CD19
9 1.807485e-20 0.4287725 0.992 0.982 1.445988e-19 4 adt-CD45RA
10 7.557375e-18 0.5735419 0.984 0.870 6.045900e-17 4 adt-CD45RO
# Set active.assay to ADT
filt_sobj@active.assay <- "ADT"
# Overlay antibody signal on gene expression UMAP
filt_sobj %>%
FeaturePlot(
reduction = "umap",
features = c("adt-CD3", "adt-CD4", "adt-CD8", "adt-CD19"),
cols = c("#3288bd", "#bd0026")
)# Overlay antibody signal on antibody UMAP
filt_sobj %>%
FeaturePlot(
reduction = "adt_umap",
features = c("adt-CD3", "adt-CD4", "adt-CD8", "adt-CD19"),
cols = c("#3288bd", "#bd0026")
)# Set ADT as the active assay
filt_sobj@active.assay <- "ADT"
# Create ridge plot
filt_sobj %>%
RidgePlot(features = c(
"adt-CD14", "adt-CD45",
"adt-CD19", "adt-CD3",
"adt-CD4", "adt-CD8"
))filt_sobj %>%
DoHeatmap(
features = rownames(filt_sobj),
angle = 0
) +
NoLegend()# Set ADT as the active assay
filt_sobj@active.assay <- "ADT"
# Plot CD3 and CD19 signal
filt_sobj %>%
FeatureScatter("adt-CD3", "adt-CD19")# Identify CD19+ cells using antibody signal
CD19_cells <- filt_sobj %>%
subset(`adt-CD19` > 2.5 & `adt-CD3` < 1) %>%
Cells()
# Set cell identities
labeled_sobj <- filt_sobj %>%
SetIdent(value = "Other") %>%
SetIdent(
value = "CD19+",
cells = CD19_cells
)
labeled_sobj@active.ident %>%
head()AAACCTGAGACAAAGG AAACCTGCACCAGGTC AAACCTGGTATGAAAC AAACGGGTCACCTTAT
Other Other Other Other
AAAGATGAGTGTACCT AAAGATGCACAGCCCA
Other Other
Levels: CD19+ Other
# Label UMAP with new cell identities
labeled_sobj %>%
DimPlot(reduction = "adt_umap")# Plot CD3 and CD19 signal
CD19_plot <- filt_sobj %>%
FeatureScatter("adt-CD3", "adt-CD19")
CD19_plot
# Identify CD19+ cells using antibody signal
labeled_sobj <- filt_sobj %>%
SetIdent(value = "Other")
labeled_sobj <- CellSelector(
plot = CD19_plot,
object = labeled_sobj,
ident = "B cells"
)
labeled_sobj@active.ident %>%
head()
# Label UMAP with new cell identities
labeled_sobj %>%
DimPlot(reduction = "adt_umap")# Plot CD4 and CD8 signal
# filt_sobj %>%
# FeatureScatter("adt_CD4", "adt_CD8")
# Identify CD4+ and CD8+ cells using antibody signal
# Set cell identities
# Label UMAP with new cell identities# Combine RNA and ADT matrices
merged_sobj <- labeled_sobj
RNA_data <- merged_sobj@assays$SCT@data
ADT_data <- merged_sobj@assays$ADT@data
merged_data <- rbind(RNA_data, ADT_data)
# Add merged matrix to Seurat object
merged_sobj@assays$SCT@data <- merged_data
# Set active assay
merged_sobj@active.assay <- "SCT"# Create Cell Browser directories
dir.create(
path = "cellbrowser/SCT",
recursive = T
)
# Export marker genes
RNA_markers %>%
rename(score = p_val_adj) %>%
select(cluster, gene, score) %>%
write_tsv("cellbrowser/SCT/RNA_markers.tsv")
# Create Cell Browser files for gene expression data
merged_sobj %>%
ExportToCellbrowser(
dir = "cellbrowser/SCT",
reductions = "umap",
dataset.name = "SCT",
markers.file = "cellbrowser/SCT/RNA_markers.tsv",
sample = "Sample",
RNA_clusters = "Cluster",
ADT_clusters = "ADT cluster",
cell_label = "Cell label",
nCount_RNA = "RNA UMI count",
nFeature_RNA = "Gene count",
percent_mito = "Percent mito UMIs",
nCount_ADT = "ADT UMI count",
nFeature_ADT = "Antibody count",
nCount_HTO = "HTO UMI count",
nFeature_HTO = "HTO count"
)# Create Cell Browser directories
dir.create(
path = "cellbrowser/ADT",
recursive = T
)
# Export marker genes
ADT_markers %>%
rename(score = p_val_adj) %>%
select(cluster, gene, score) %>%
write_tsv("cellbrowser/ADT/ADT_markers.tsv")
# Create Cell Browser files for antibody data
merged_sobj %>%
ExportToCellbrowser(
dir = "cellbrowser/ADT",
reductions = "adt_umap",
dataset.name = "ADT",
markers.file = "cellbrowser/ADT/ADT_markers.tsv",
sample = "Sample",
ADT_clusters = "Cluster",
RNA_clusters = "RNA cluster",
cell_label = "Cell label",
nCount_RNA = "RNA UMI count",
nFeature_RNA = "Gene count",
percent_mito = "Percent mito UMIs",
nCount_ADT = "ADT UMI count",
nFeature_ADT = "Antibody count",
nCount_HTO = "HTO UMI count",
nFeature_HTO = "HTO count"
)
mkdir -p cellbrowser/html
cbBuild \
-i cellbrowser/SCT/cellbrowser.conf \
-i cellbrowser/ADT/cellbrowser.conf \
-o cellbrowser/html